!pip install keras_cvApplication Deep Learning in Sports
Setup
!pip install ultralyticsimport os
os.environ["KERAS_BACKEND"] = "jax" # @param ["tensorflow", "jax", "torch"]
from tensorflow import data as tf_data
import tensorflow_datasets as tfds
import keras
import keras_cv
import numpy as np
from keras_cv import bounding_box
import os
from keras_cv import visualization
import tqdmUsing JAX backend.
from google.colab import drive
drive.mount('/content/drive')Mounted at /content/drive
Convert video to frames
import cv2
vidcap = cv2.VideoCapture('Man_Ch_demo.mp4')
success,image = vidcap.read()
count = 0
while success:
cv2.imwrite("/content/drive/MyDrive/frames/frame%d.jpg" % count, image) # save frame as JPEG file
success,image = vidcap.read()
print('Read a new frame: ', success)
count += 1first model - yolo_v8_m_pascalvoc
pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
"yolo_v8_m_pascalvoc", bounding_box_format="xywh"
)Downloading from https://www.kaggle.com/api/v1/models/keras/yolov8/keras/yolo_v8_m_pascalvoc/2/download/config.json...
100%|██████████| 2.27k/2.27k [00:00<00:00, 3.44MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/yolov8/keras/yolo_v8_m_pascalvoc/2/download/model.weights.h5...
100%|██████████| 99.5M/99.5M [00:01<00:00, 66.4MB/s]
/usr/local/lib/python3.10/dist-packages/keras_cv/src/models/task.py:43: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
return id(getattr(self, attr)) not in self._functional_layer_ids
/usr/local/lib/python3.10/dist-packages/keras_cv/src/models/task.py:43: UserWarning: `layer.updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
return id(getattr(self, attr)) not in self._functional_layer_ids
filepath = "/content/drive/MyDrive/frames/frame0.jpg"
image = keras.utils.load_img(filepath)
image = np.array(image)
inference_resizing = keras_cv.layers.Resizing(
640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)
image_batch = inference_resizing([image])
y_pred = pretrained_model.predict(image_batch)
# y_pred is a bounding box Tensor:
# {"classes": ..., boxes": ...}
output = visualization.plot_bounding_box_gallery(
image_batch,
value_range=(0, 265),
rows=1,
cols=1,
y_pred=y_pred,
scale=15,
font_scale=0.5,
bounding_box_format="xywh",
class_mapping=class_mapping,
)filepath = "/content/drive/MyDrive/frames/frame0.jpg"
image = keras.utils.load_img(filepath)
image = np.array(image)
visualization.plot_image_gallery(
np.array([image]),
value_range=(0, 256),
rows=1,
cols=1,
scale=5,
)inference_resizing = keras_cv.layers.Resizing(
640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)image_batch = inference_resizing([image])class_ids = [
"Aeroplane",
"Bicycle",
"Bird",
"Boat",
"Bottle",
"Bus",
"Car",
"Cat",
"Chair",
"Cow",
"Dining Table",
"Dog",
"Horse",
"Motorbike",
"Person",
"Potted Plant",
"Sheep",
"Sofa",
"Train",
"Tvmonitor",
"Total",
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))y_pred = pretrained_model.predict(image_batch)
# y_pred is a bounding box Tensor:
# {"classes": ..., boxes": ...}
output = visualization.plot_bounding_box_gallery(
image_batch,
value_range=(0, 265),
rows=1,
cols=1,
y_pred=y_pred,
scale=15,
font_scale=0.3,
bounding_box_format="xywh",
class_mapping=class_mapping,
line_thickness=1
)1/1 [==============================] - 0s 224ms/step
import matplotlib.pyplot as plt
output.savefig("/content/drive/MyDrive/output/frame0.jpg", format='jpg')loop through and annotate each frame
inference_resizing = keras_cv.layers.Resizing(
640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)
# Loop through each frame from 0 to 450
for i in range(450):
# Construct the filepath for the current frame
filepath = f"/content/drive/MyDrive/frames/frame{i}.jpg"
# Load the image using keras
image = keras.utils.load_img(filepath)
image = np.array(image)
# Resize the image using Resizing layer
image_batch = inference_resizing([image])
# Perform prediction
y_pred = pretrained_model.predict(image_batch)
# Plot bounding boxes on the image
output = visualization.plot_bounding_box_gallery(
image_batch,
value_range=(0, 265),
rows=1,
cols=1,
y_pred=y_pred,
scale=15,
font_scale=0.3,
bounding_box_format="xywh",
class_mapping=class_mapping,
line_thickness=1
)
# Save the output image
output.savefig(f"/content/drive/MyDrive/output/output_frame{i}.jpg", format='jpg')Compile frames into a video
import cv2
import os
# Directory containing frames
frames_dir = "/content/drive/MyDrive/output"
# Specify the output video file path
output_video_path = "/content/drive/MyDrive/output_video.mp4"
# Video codec and frame rate
codec = cv2.VideoWriter_fourcc(*"MP4V")
fps = 30
# Get the dimensions of the first frame to set up the video writer
first_frame_path = os.path.join(frames_dir, "output_frame0.jpg")
first_frame = cv2.imread(first_frame_path)
height, width, _ = first_frame.shape
# Create the video writer object
out = cv2.VideoWriter(output_video_path, codec, fps, (width, height))
# Iterate through each frame and write it to the video
i = 0
while True:
frame_path = os.path.join(frames_dir, f"output_frame{i}.jpg")
if not os.path.exists(frame_path):
break
frame = cv2.imread(frame_path)
out.write(frame)
i += 1
# Release the video writer object
out.release()from IPython.display import HTML
from base64 import b64encode
import os
# Input video path
save_path = "/content/drive/MyDrive/output_video.mp4"
# Compressed video path
compressed_path = "/content/drive/MyDrive/output_video_compressed.mp4"
os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")
# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=1280 controls>
<source src="%s" type="video/mp4">
</video>
""" % data_url)Second model - yolov8n
from ultralytics import YOLO
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from google.colab.patches import cv2_imshowmodel = YOLO('yolov8n.pt')Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 82.3MB/s]
model = YOLO('/content/drive/MyDrive/yolov8n.pt')Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to '/content/drive/MyDrive/yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 53.7MB/s]
Test image
results = model.predict('/content/drive/MyDrive/frames/frame0.jpg')
image 1/1 /content/drive/MyDrive/frames/frame0.jpg: 384x640 21 persons, 192.9ms
Speed: 2.2ms preprocess, 192.9ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)
results[0].boxes[0].xyxytensor([689.3851, 597.5267, 730.9827, 694.0110], device='cuda:0')
for i, r in enumerate(results):
# Plot results image
im_bgr = r.plot() # BGR-order numpy array
im_rgb = Image.fromarray(im_bgr[..., ::-1]) # RGB-order PIL image
# Show results to screen (in supported environments)
r.show()
r.save(filename=f'/content/drive/MyDrive/test{i}.jpg')Loop through each frame
for i in range(450):
results = model.predict(f'/content/drive/MyDrive/frames/frame{i}.jpg', conf = 0.5, line_width = 1)
for d, r in enumerate(results):
im_bgr = r.plot() # BGR-order numpy array
im_rgb = Image.fromarray(im_bgr[..., ::-1]) # RGB-order PIL image
r.save(filename=f'/content/drive/MyDrive/yolo_v8_results/output_frame{i}.jpg')Convert into video
import cv2
import os
# Directory containing frames
frames_dir = "/content/drive/MyDrive/yolo_v8_results"
# Specify the output video file path
output_video_path = "/content/drive/MyDrive/yolo_v8_video.mp4"
# Video codec and frame rate
codec = cv2.VideoWriter_fourcc(*"MP4V")
fps = 30
# Get the dimensions of the first frame to set up the video writer
first_frame_path = os.path.join(frames_dir, "output_frame0.jpg")
first_frame = cv2.imread(first_frame_path)
height, width, _ = first_frame.shape
# Create the video writer object
out = cv2.VideoWriter(output_video_path, codec, fps, (width, height))
# Iterate through each frame and write it to the video
i = 0
while True:
frame_path = os.path.join(frames_dir, f"output_frame{i}.jpg")
if not os.path.exists(frame_path):
break
frame = cv2.imread(frame_path)
out.write(frame)
i += 1
# Release the video writer object
out.release()from IPython.display import HTML
from base64 import b64encode
import os
# Input video path
save_path = "/content/drive/MyDrive/yolov8_video.mp4"
# Compressed video path
compressed_path = "/content/drive/MyDrive/yolov8_video_compressed.mp4"
os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")
# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=1280 controls>
<source src="%s" type="video/mp4">
</video>
""" % data_url)Third Model - Ball and player joint detection
model = YOLO('/content/drive/MyDrive/best.pt')results = model.predict('/content/drive/MyDrive/frames/frame0.jpg', conf = 0.5, line_width = 1)Explore the structure of the ouput
Corners of the bounding boxes
for result in results:
boxes = result.boxes
conf = result.boxes
x1, y1, x2, y2 = boxes.xyxy[1]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
corners = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
corners[(744, 433), (776, 433), (776, 504), (744, 504)]
Display the orginal image from the output
import matplotlib.pyplot as plt
import cv2
# Load the image from the NumPy array
x1, y1, x2, y2 = boxes.xyxy[3]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
corners = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
image = results[0].orig_img
image = np.transpose(image, (0,1,2))
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
top_left = (x1, y1)
bottom_right = (x2, y2)
# Convert the image to RGB format
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Draw the rectangle on the image
cv2.rectangle(img_rgb, top_left, bottom_right, (0, 255, 0), 2)
# Display the image using matplotlib
plt.imshow(img_rgb)
plt.show()Display from the folder
img = cv2.imread('/content/drive/MyDrive/frames/frame0.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.show()import cv2
import os
# Directory containing frames
frames_dir = "/content/drive/MyDrive/yolo_spec_results"
# Specify the output video file path
output_video_path = "/content/drive/MyDrive/yolospec_video.mp4"
# Video codec and frame rate
codec = cv2.VideoWriter_fourcc(*"MP4V")
fps = 30
# Get the dimensions of the first frame to set up the video writer
first_frame_path = os.path.join(frames_dir, "output_frame0.jpg")
first_frame = cv2.imread(first_frame_path)
height, width, _ = first_frame.shape
# Create the video writer object
out = cv2.VideoWriter(output_video_path, codec, fps, (width, height))
# Iterate through each frame and write it to the video
i = 0
while True:
frame_path = os.path.join(frames_dir, f"output_frame{i}.jpg")
if not os.path.exists(frame_path):
break
frame = cv2.imread(frame_path)
out.write(frame)
i += 1
# Release the video writer object
out.release()from IPython.display import HTML
from base64 import b64encode
import os
# Input video path
save_path = "/content/drive/MyDrive/yolospec_video.mp4"
# Compressed video path
compressed_path = "/content/drive/MyDrive/yolospec_video_compressed.mp4"
os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")
# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=1280 controls>
<source src="%s" type="video/mp4">
</video>
""" % data_url)Summary of the results at the point:
The first model cannot identify all objects of interest in the frame. Actually it fails to identify a significant number of players in the frame.
The second model can identify almost every player in the frame. One problem is that it is not trained to detect the ball in the frame.
However, it is extremely time-consuming to build a training data set on our own by labeling each objects of interest, and there is no well-structured, high-quality dataset on this task. Fortunately, we found a model that can successfully detect the ball.
Currently, it is in fact not very crucial that the pretrained model has to provide the correct labeling: as long as it is able to draw decent bounding boxes for each objects of interest, we can customize the labeling and other classification tasks on our own.
First stage progress in classifying the team
It is important to note that the pretrained model only classify between human and objects, it does not classify between players or teams. For the classfication between teams, we could try to classify by the color of the jerseys
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
model = YOLO('/content/drive/MyDrive/best.pt')
# Jersey color
team1 = 'Chelsea'
ch_g = [192, 54, 69]
ch_p = [46, 69, 121]
team2 = 'Manchester City'
man_g = [237,255,42]
man_p = [144,170,187]Basic labeling and team classification
for i in range(450):
# prediction
results = model.predict(f'/content/drive/MyDrive/frames/frame{i}.jpg', conf = 0.5, line_width = 1)
# pull out the elements in the output
boxes = results[0].boxes
image = results[0].orig_img
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
for d in range(len(boxes.xyxy)):
if boxes.cls[d].cpu().numpy() == 0:
# save the corners of bounding boxes
x1, y1, x2, y2 = boxes.xyxy[d]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
corners = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
top_left = (x1, y1)
bottom_right = (x2, y2)
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
# save the color channels for future comparison
rgb = []
for e in [0,1,2]:
channel_data = image[center_y, center_x, e]
rgb.append(channel_data)
col_diff = []
# calculate the color difference
for f in [ch_g, ch_p, man_g, man_p]:
col_diff.append(np.linalg.norm(np.array(rgb) - np.array(f)))
# classification based on the euclidean distance between the color vectors of the center pixel and the pre-specified one
for g in range(len(col_diff)):
if min(col_diff) == col_diff[g]:
if g>=2:
cv2.putText(image, f'{team2}' + f" {boxes.conf[d]:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (man_p[0],man_p[1],man_p[2]), 1)
cv2.rectangle(image, top_left, bottom_right, (man_p[0],man_p[1],man_p[2]), 1)
else:
cv2.putText(image, f'{team1}' + f" {boxes.conf[d]:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (ch_p[0],ch_p[1],ch_p[2]), 1)
cv2.rectangle(image, top_left, bottom_right, (ch_p[0],ch_p[1],ch_p[2]), 1)
if boxes.cls[d].cpu().numpy() == 1:
x1, y1, x2, y2 = boxes.xyxy[d]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
corners = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
top_left = (x1, y1)
bottom_right = (x2, y2)
cv2.rectangle(image, top_left, bottom_right, (0, 0, 0), 2)
cv2.putText(image, f'Ref'+ f" {boxes.conf[d]:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,0,0), 1)
if boxes.cls[d].cpu().numpy() == 2:
x1, y1, x2, y2 = boxes.xyxy[d]
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
corners = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
top_left = (x1, y1)
bottom_right = (x2, y2)
cv2.rectangle(image, top_left, bottom_right, (160, 160, 160), 2)
cv2.putText(image, 'Ball'+ f" {boxes.conf[d]:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (160,160,160), 1)
image = Image.fromarray(image)
image.save(f'/content/drive/MyDrive/yolo_spec_team_results/output_frame{i}.jpg',format='JPEG')
sample result
img = cv2.imread('/content/drive/MyDrive/yolo_spec_team_results/output_frame0.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.show()import cv2
import os
# Directory containing frames
frames_dir = "/content/drive/MyDrive/yolo_spec_team_results"
# Specify the output video file path
output_video_path = "/content/drive/MyDrive/yolospec_team_video.mp4"
# Video codec and frame rate
codec = cv2.VideoWriter_fourcc(*"MP4V")
fps = 30
# Get the dimensions of the first frame to set up the video writer
first_frame_path = os.path.join(frames_dir, "output_frame0.jpg")
first_frame = cv2.imread(first_frame_path)
height, width, _ = first_frame.shape
# Create the video writer object
out = cv2.VideoWriter(output_video_path, codec, fps, (width, height))
# Iterate through each frame and write it to the video
i = 0
while True:
frame_path = os.path.join(frames_dir, f"output_frame{i}.jpg")
if not os.path.exists(frame_path):
break
frame = cv2.imread(frame_path)
out.write(frame)
i += 1
# Release the video writer object
out.release()from IPython.display import HTML
from base64 import b64encode
import os
# Input video path
save_path = "/content/drive/MyDrive/yolospec_team_video.mp4"
# Compressed video path
compressed_path = "/content/drive/MyDrive/yolospec_team_video_compressed.mp4"
os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")
# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=1280 controls>
<source src="%s" type="video/mp4">
</video>
""" % data_url)